package com.spbsu.crawl.bl.helpers;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.node.ArrayNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import gnu.trove.map.hash.TObjectIntHashMap;
import org.jetbrains.annotations.NotNull;
import java.io.*;
import java.util.ArrayList;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
/**
* Created by noxoomo on 14/07/16.
*/
public class CategoricalFeaturesMap {
private static final Logger log = Logger.getLogger(CategoricalFeaturesMap.class.getName());
private TObjectIntHashMap<String> direct = new TObjectIntHashMap<>();
private List<String> inverse = new ArrayList<>();
private synchronized int addNewEntryAndReturn(String value) {
if (direct.containsKey(value)) {
return direct.get(value);
}
final int nextId = direct.size();
direct.put(value, nextId);
inverse.add(value);
return nextId;
}
public int value(@NotNull String message) {
if (direct.containsKey(message)) {
return direct.get(message);
} else {
return addNewEntryAndReturn(message);
}
}
public String value(int id) {
if (id >= 0 && id < inverse.size()) {
return inverse.get(id);
}
return null;
}
public int dictSize() {
return inverse.size();
}
public void save(final File file) {
ObjectMapper mapper = new ObjectMapper();
ObjectNode node = mapper.createObjectNode();
ArrayNode jsonList = mapper.createArrayNode();
inverse.forEach(jsonList::add);
node.set("inverse_index", jsonList);
try {
BufferedWriter writer = new BufferedWriter(new FileWriter(file));
writer.write(new JsonProgress(this).toJson());
writer.flush();
writer.close();
} catch (IOException e) {
log.log(Level.WARNING, "Can't save cat feature index to file");
e.printStackTrace();
}
}
public static CategoricalFeaturesMap load(final File file) throws IOException {
try {
String data = new BufferedReader(new FileReader(file)).lines().reduce((l, r) -> l + r).get();
return JsonProgress.fromJson(data);
} catch (IOException e) {
log.log(Level.ALL, "Error: can't load cat features map");
throw e;
}
}
@JsonIgnoreProperties(ignoreUnknown = true)
private static class JsonProgress {
@JsonProperty("inverse_index")
final List<String> inverseIndex;
public JsonProgress() {
inverseIndex = new ArrayList<>();
}
public JsonProgress(final CategoricalFeaturesMap map) {
this.inverseIndex = map.inverse;
}
public List<String> inverseIndex() {
return inverseIndex;
}
String toJson() {
ObjectMapper mapper = new ObjectMapper();
final ObjectNode node = mapper.valueToTree(this);
try {
return mapper.writeValueAsString(node);
} catch (JsonProcessingException e) {
throw new RuntimeException(e);
}
}
static CategoricalFeaturesMap fromJson(String text) {
ObjectMapper mapper = new ObjectMapper();
JsonProgress data;
try {
final Class<JsonProgress> clazz = JsonProgress.class;
data = mapper.readValue(text, clazz);
} catch (Exception e) {
throw new RuntimeException(e);
}
final CategoricalFeaturesMap map = new CategoricalFeaturesMap();
data.inverseIndex().forEach(map::addNewEntryAndReturn);
return map;
}
}
}